Fires in NYC and FDNY Response

Tasks

1. Location of Severe Fires

Provide a leaflet map of the highest severity fires (i.e. subset to the highest category in HIGHEST_LEVEL_DESC) contained in the file buiding_fires.csv. Ignore locations that fall outside the five boroughs of New York City. Provide at least three pieces of information on the incident in a popup.

library(leaflet)

library(RColorBrewer)

borough <- c("1 - Manhattan", "2 - Bronx", "3 - Staten Island", "4 - Brooklyn", "5 - Queens") #Select only cases inside the 5 boroughs

fire_subset <- fire %>%
  filter(HIGHEST_LEVEL_DESC == '75 - All Hands Working' & BOROUGH_DESC %in% borough) #Subsetting the dataframe fire on 75 - All Hands Working

pal = colorFactor("Set1", domain = fire_subset$BOROUGH_DESC) # Grab a palette, a color per borough

color_borough = pal(fire_subset$BOROUGH_DESC)

content <- paste("When:",fire_subset$INCIDENT_DATE_TIME,"<br/>",
                 "Address:",fire_subset$address,"<br/>",
                 "Property Type:",fire_subset$PROPERTY_USE_DESC,"<br/>")

m <- leaflet(fire_subset) %>%  # Create a map widget
  addTiles('http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png') %>%
    setView(-73.9949344, 40.7179112, zoom = 11)

m %>% addCircleMarkers(color = color_borough, weight = 4, popup = content, radius = 1) %>%
  addLegend(pal = pal, values = ~fire_subset$BOROUGH_DESC, title = "Boroughs")

2. Layers and Clusters

a) Color by Type of Property

Start with the previous map. Now, distinguish the markers of the fire locations by PROPERTY_USE_DESC, i.e. what kind of property was affected. If there are too many categories, collapse some categories. Choose an appropriate coloring scheme to map the locations by type of affected property. Add a legend informing the user about the color scheme. Also make sure that the information about the type of affected property is now contained in the popup information. Show this map.

pal2 = colorFactor("Set2", domain = fire_subset$PROPERTY_USE_DESC) #Different color for each property type

color_prop = pal2(fire_subset$PROPERTY_USE_DESC)

content2 <- paste("When:",fire_subset$INCIDENT_DATE_TIME,"<br/>",
                 "Address:",fire_subset$address,"<br/>",
                 "Borough:", fire_subset$BOROUGH_DESC, "<br/>",
                 "Property Type:",fire_subset$PROPERTY_USE_DESC,"<br/>")

m %>% addCircleMarkers(color = color_prop, weight = 4, popup = content2, opacity = 1, radius = 1) %>%
  addLegend("topleft", pal = pal2, values = ~fire_subset$PROPERTY_USE_DESC, title = htmltools::HTML("Property Type"), opacity = 1)
b) Cluster

Add marker clustering, so that zooming in will reveal the individual locations but the zoomed out map only shows the clusters. Show the map with clusters.

m %>% addCircleMarkers(color = color_prop, 
                       popup = content2,
                       clusterOptions = markerClusterOptions())

3. Fire Houses

The second data file contains the locations of the 218 firehouses in New York City. Start with the non-clustered map (2b) and now adjust the size of the circle markers by severity (TOTAL_INCIDENT_DURATION or UNITS_ONSCENE seem plausible options). More severe incidents should have larger circles on the map. On the map, also add the locations of the fire houses. Add two layers (“Incidents”, “Firehouses”) that allow the user to select which information to show.

content_firehouse <- paste("Firehouse Name:",firehouses$FacilityName,"<br/>",
                 "Address:",firehouses$FacilityAddress,"<br/>")

#icons for firehouses
firehouseIcons <- icons(
  iconUrl = "./icons/pin-sharp.svg",
  iconWidth = 15, iconHeight = 15,
  iconAnchorX = 7.5, iconAnchorY = 8.5
  )

m_l1 = leaflet() %>% 
 setView(-73.9949344, 40.7179112, zoom = 15) %>%
 # Base groups = Background layer
 addTiles('http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png', group = "base") %>%
 # Data Layers
 ## First Data Layer: Incidents
  addCircleMarkers(data = fire_subset, color = color_prop, group = "Incidents", 
                       popup = content2,
                       clusterOptions = markerClusterOptions(),
                   opacity = 0.9, 
                   fillOpacity = 0.9, 
                   radius = fire_subset$UNITS_ONSCENE) %>%
  ## Second Data Layer: Firehouses
  addMarkers(data = firehouses,
             icon = firehouseIcons,
             group="Firehouses",
             popup = content_firehouse) %>%
  addLayersControl(
    baseGroups = c("base"),
  overlayGroups = c("Incidents","Firehouses"),
  options = layersControlOptions(collapsed = TRUE))

m_l1

4. Distance from Firehouse and Response Time

We now want to investigate whether the distance of the incident from the nearest firehouse varies across the city.

a) Calculate Distance

For all incident locations (independent of severity), identify the nearest firehouse and calculate the distance between the firehouse and the incident location. Provide a scatter plot showing the time until the first engine arrived (the variables INCIDENT_DATE_TIME and ARRIVAL_DATE_TIME) will be helpful.

library(geosphere)

#For each fire incident
for(i in 1:nrow(fire)){
  #row number of the nearest firestation
  r <- apply(distm(fire[,c(26,27)][i,], firehouses[,c(6,5)], fun = distHaversine), 1, which.min)
  
  #distance of the nearest firestation
  dist_min <- apply(distm(fire[,c(26,27)][i,], firehouses[,c(6,5)], fun = distHaversine), 1, min)
  
  #column to store nearest firestation name
  fire$NEAREST_STATION[i] <- firehouses$FacilityName[r]
  
  fire$DISTANCE[i] <- dist_min
}
library(lubridate)

#time_taken_to_arrive is in seconds
fire$TIME_TAKEN_TO_ARRIVE = parse_date_time(fire$ARRIVAL_DATE_TIME, "%m/%d/%Y %H:%M:%S %p", tz = "EST") -
parse_date_time(fire$INCIDENT_DATE_TIME, "%m/%d/%Y %H:%M:%S %p", tz = "EST")

#Converting time in minutes
fire$TIME_TAKEN_TO_ARRIVE <- round(fire$TIME_TAKEN_TO_ARRIVE/60, digits = 2)

#Converting distance into kilometers
fire$DISTANCE <- round(fire$DISTANCE/1000, digits = 2)

Note :Arrival date time is missing for 21 incidents.

library(ggplot2)

library(ggthemes)

plot1 <- ggplot(data = fire, aes(x = TIME_TAKEN_TO_ARRIVE, y = DISTANCE)) +
  geom_point() +
  theme_tufte() +
  labs(title = "Mapping Fire Incidents and FDNY Response Times", 
       subtitle = "Relation between arrival time of fire engines and distance from a firehouse", 
       caption = "source: NYC data") +
  scale_x_discrete(name ="Arrival time of the first fire engine (in min)", 
                   limits=c(1,5,10,15,20,25,30,35,40, 50, 60, 70, 80, 90, 100)) +
  ylab("Distance to the nearest firehouse (in km)")

plot1

Note :There are multiple outliers which are making it difficult to see the trend.

Removing the outliers for a better graph.

fire_better <- fire %>%
  filter(DISTANCE < 25 & 
           TIME_TAKEN_TO_ARRIVE < 25 &
           TIME_TAKEN_TO_ARRIVE > 0)

plot2 <- ggplot(data = fire_better, aes(x = TIME_TAKEN_TO_ARRIVE, y = DISTANCE)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  theme_tufte() +
  labs(title = "Mapping Fire Incidents and FDNY Response Times",
       subtitle = "Relation between arrival time of fire engines and distance from a firehouse",
       caption = "source: NYC data") +
  scale_x_discrete(name ="Arrival time of the first fire engine (in min)",
                   limits=c(1,5,10,15,20,25)) +
  ylab("Distance to the nearest firehouse (in km)")

plot2

Now also visualize the patterns separately for severe and non-severe incidents (use HIGHEST_LEVEL_DESC but feel free to reduce the number of categories). What do you find?

plot3 <- ggplot(data = fire_better, aes(x = TIME_TAKEN_TO_ARRIVE, y = DISTANCE)) +
  geom_point(aes(color = COMBINED_LEVEL_DESC)) +
  facet_wrap(COMBINED_LEVEL_DESC ~ ., nrow = 2) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(title = "Mapping Fire Incidents and FDNY Response Times",
       subtitle = "Relation between arrival time of fire engines and distance from a firehouse", 
       caption = "source: NYC data") +
  scale_x_discrete(name ="Arrival time of the first fire engine (in min)", 
                   limits=c(1,5,10,15,20,25)) +
  ylab("Distance to the nearest firehouse (in km)") +
  scale_color_discrete(name = "Incident Severity")

plot3

fire_better <- fire_better %>%
  filter(!is.na(fire_better$COMBINED_LEVEL_DESC))

plot4 <- ggplot(data = fire_better, aes(x = TIME_TAKEN_TO_ARRIVE,
                                        y = DISTANCE, 
                                        color = COMBINED_LEVEL_DESC)) +
  geom_smooth(method = "lm",
              se = FALSE) +
  labs(title = "Mapping Fire Incidents and FDNY Response Times", 
       subtitle = "Relation between arrival time of fire engines and distance from a firehouse",
       caption = "source: NYC data") +
  scale_x_discrete(name ="Arrival time of the first fire engine (in min)",
                   limits=c(1,5,10,15,20)) +
  ylab("Distance to the nearest firehouse (in km)") +
  scale_color_discrete(name = "Incident Severity")

plot4

The slopes help compare if the incident severity matters in the relation between arrival time and the incident distance.

Second Alarm incidences have the highest slope but there are lesser number of incidences for second alarm.

Highest alarm incidences have a higher slope than first alarm incidences. This implies that the first fire engine reaches similar distance faster if the incidence is of highest severity compared to first alarm severity.

b) Map of Response Times

Provide a map visualization of response times. Investigate whether the type of property affected (PROPERTY_USE_DESC) or fire severity (HIGHEST_LEVEL_DESC) play a role here.

#Select only cases inside the 5 boroughs and only required columns
fire_subset1 <- fire %>%
  filter(BOROUGH_DESC %in% borough) %>%
  select(TIME_TAKEN_TO_ARRIVE, PROPERTY_USE_DESC, HIGHEST_LEVEL_DESC, lon, lat)
#icons
pinIcons <- icons(
  iconUrl = "./icons/pin-sharp.svg",
  iconWidth = 15, iconHeight = 15,
  iconAnchorX = 7.5, iconAnchorY = 8.5
)
pal3 = colorFactor("RdYlGn", domain = factor(fire_subset1$TIME_TAKEN_TO_ARRIVE), reverse = TRUE) # Grab a palette

content_prop <- paste("Property Type:",fire_subset1$PROPERTY_USE_DESC,"<br/>",
                      "Arrival Time (in mins):", fire_subset1$TIME_TAKEN_TO_ARRIVE, "<br/>")

content_sev <- paste("Severity:", fire_subset1$HIGHEST_LEVEL_DESC, "<br/>",
                     "Arrival Time (in mins):", fire_subset1$TIME_TAKEN_TO_ARRIVE, "<br/>")
1. Type of property
leaflet() %>%
  setView(-73.9949344, 40.7179112, zoom = 15) %>%
  addTiles('http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png') %>%
  addMarkers(data = fire_subset1, 
             group = "Property Type", 
             lng = fire_subset1$lon,
             lat = fire_subset1$lat,
             popup = content_prop,
             icon = pinIcons
             ) %>%
  addCircleMarkers(color = pal3(fire_subset1$TIME_TAKEN_TO_ARRIVE),
                   lng = fire_subset1$lon,
                   lat = fire_subset1$lat,
                   opacity = 0.3,
                   fillOpacity = 0.3,
                   radius = fire_subset1$TIME_TAKEN_TO_ARRIVE)
2. Severity levels
leaflet() %>%
  setView(-73.9949344, 40.7179112, zoom = 15) %>%
  addTiles('http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png') %>%
  #Data Layers
  addCircleMarkers(data = fire_subset1,
             lng = fire_subset1$lon,
             lat = fire_subset1$lat,
             popup = content_sev,
             color = pal3(fire_subset1$TIME_TAKEN_TO_ARRIVE),
             radius = ~ifelse(fire_subset1$HIGHEST_LEVEL_DESC == "1- First Alarm" | HIGHEST_LEVEL_DESC == "0 - Initial alarm", 6, ifelse(fire_subset1$HIGHEST_LEVEL_DESC == "2- Second Alarm" | fire_subset1$HIGHEST_LEVEL_DESC == "3- Third Alarm", 10, ifelse(fire_subset1$HIGHEST_LEVEL_DESC == "4- Fourth Alarm" | fire_subset1$HIGHEST_LEVEL_DESC == "5- Fifth Alarm", 14, 18))),
             fillOpacity = 0.8)
#Creating the required summarised dataframe
df_fire_summary <- fire %>%
  select(INCIDENT_DATE_TIME, BOROUGH_DESC, TIME_TAKEN_TO_ARRIVE)

df_fire_summary$INCIDENT_YEAR <- year(parse_date_time(fire$INCIDENT_DATE_TIME, "%m/%d/%Y %H:%M:%S %p", tz = "EST"))

drops <- c("INCIDENT_DATE_TIME")
df_fire_summary <- df_fire_summary[, !(names(df_fire_summary) %in% drops)]

df_fire_summary1 <- df_fire_summary %>% 
  dplyr::group_by(INCIDENT_YEAR, BOROUGH_DESC) %>% 
  dplyr::summarise(MEAN_RESPONSE_TIME = round(mean(TIME_TAKEN_TO_ARRIVE, na.rm = TRUE), digits = 2))
#Splitting the dataframe by year
df_splitted_by_year <- df_fire_summary1 %>%
   split(.$INCIDENT_YEAR)

#creating a copy of the spatial data frame to join the summary time data of all the years
nyc_all <- nyc

yr <- 2013

for(tib in df_splitted_by_year){
  nyc_all <- merge(nyc_all, tib, by.x = "boro_code", by.y = "BOROUGH_DESC")
  names(nyc_all)[names(nyc_all) == 'INCIDENT_YEAR'] <- paste("YEAR_", yr, sep = "") 
  names(nyc_all)[names(nyc_all) == 'MEAN_RESPONSE_TIME'] <- paste("MEAN_RESPONSE_TIME_", yr, sep = "")
  yr = yr + 1
}

Show a faceted choropleth map indicating how response times have developed over the years. What do you find?

layout <- tm_layout(  
          legend.title.size = 1,
          legend.text.size = 0.6,
          legend.position = c(0.8,0),
          legend.bg.color = "white",
          legend.bg.alpha = 1,
          bg.color="white",
          frame=FALSE)

tm_shape(nyc_all) + 
  layout + 
    tm_polygons(c("MEAN_RESPONSE_TIME_2013", "MEAN_RESPONSE_TIME_2014", "MEAN_RESPONSE_TIME_2015", "MEAN_RESPONSE_TIME_2016", "MEAN_RESPONSE_TIME_2017", "MEAN_RESPONSE_TIME_2018"), 
        style=c("pretty"),
        palette=c("YlOrBr"),
        auto.palette.mapping=FALSE,
        title=c("Avg Response Time 2013", "Avg Response Time 2014", "Avg Response Time 2015", "Avg Response Time 2016", "Avg Response Time 2017", "Avg Response Time 2018")) +
  tm_style_white() +
  tm_text("boro_name", size=.6, shadow=TRUE,
             bg.color="white", bg.alpha=.25,
             remove.overlap=TRUE)